clean__adm__clean_names<-function(data_adm,year){
  #if (year!=2014){
  #2013
  data_adm_raw$nume<-toupper(data_adm_raw$nume)
  data_adm_raw$nume<-gsub("’","AR",data_adm_raw$nume) #weird ' to AR
  data_adm_raw$nume<-gsub("Ž","AR",data_adm_raw$nume) #Z inverted hat to AN
  data_adm_raw$nume<-gsub(paste("\\","t",sep=""),"",data_adm_raw$nume) #\t to white space
  data_adm_raw$nume<-gsub(paste("\\","?",sep=""),"UL",data_adm_raw$nume) #? to UL
  
  data_adm[grepl("\\[",data_adm_raw$nume),]$nume<-NULL #square bracket contains anonymized name
  
  #2012
  data_adm_raw$nume<-gsub("/","-",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("-\\."," ",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("119","",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("SCOALA CU CLASELE I-VIII NR.1 COMARNA",NA,data_adm_raw$nume)
  
  #2011
  data_adm_raw$nume<-gsub("R\\\\","",data_adm_raw$nume) #1 occurence
  data_adm_raw$nume<-gsub("W\\\\","",data_adm_raw$nume) # 1 occurence
  data_adm_raw$nume<-gsub("SCOALA CU CLASELE I-VIII NR 97 SCOALA CU CLASELE I-VIII NR. 97 BUCURESTI",NA,data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ś","S",data_adm_raw$nume) #not sure what the actualt name is: VŚCEANU; convert Ś to S... 
  data_adm_raw$nume<-gsub(";","",data_adm_raw$nume)  
  
  #2010
  data_adm_raw$nume<-gsub(",","",data_adm_raw$nume)  
  
  #2009
  data_adm_raw$nume<-gsub("5","",data_adm_raw$nume)  
  data_adm_raw$nume<-gsub("`","'",data_adm_raw$nume)
  
  #Spacing et al
  data_adm_raw$nume<-gsub("\\s+", " ", trimws(data_adm_raw$nume)) #remove redundant spaces
  data_adm_raw$nume<-gsub(" -", "-", data_adm_raw$nume)
  data_adm_raw$nume<-gsub("- ", "-", data_adm_raw$nume)
  data_adm_raw$nume<-toupper(data_adm_raw$nume) #uppercase
  data_adm_raw$nume<-gsub("\\.","",data_adm_raw$nume) # . to blank
  #}
  
  
  
  #2014 has a lot of weird characters; I initially referenced characters from both bac_2018 and adm_2014 
  #to fix this because there were issues with encoding some of the characters; this is why I need to load these files
  #and get characters from them in order to fix this
  # if (year==2014){
  #   Sys.setlocale(locale="English")
  #   data_adm_raw<-read.csv("admitere_2014.csv",sep = ";",	encoding="UTF-8", stringsAsFactors=FALSE)
  #   data_bac_raw<-read.csv("bacalaureat_2018.csv",sep = ";",	encoding="UTF-8", stringsAsFactors=FALSE)
  #   char_name_adm<-sort(unique(unlist(unique(strsplit(data_adm_raw$nume, "")))))
  #   char_name_bac<-sort(unique(unlist(unique(strsplit(data_bac_raw$nume, "")))))
  #   
  #   Sys.setlocale(locale="Romanian")
  #   
  #   #data_adm_raw[grepl("TILI",data_adm_raw$nume),]$nume
  #   data_adm_raw[grepl(char_name_adm[76],data_adm_raw$nume),]$nume
  #   
  #   data_adm_raw$nume<-gsub(char_name_adm[1],"",data_adm_raw$nume) #change error code to void
  #   data_adm_raw$nume<-gsub(char_name_adm[20],"",data_adm_raw$nume) ##change error code to void
  #   #data_adm_raw$nume<-gsub("\\\\u00..", "", data_adm_raw$nume)
  #   data_adm_raw$nume<-gsub(char_name_adm[3],char_name_adm[2],data_adm_raw$nume) #change minus to dash
  #   data_adm_raw$nume<-gsub(char_name_adm[5],char_name_adm[4],data_adm_raw$nume) #change weird space to normal space
  #   data_adm_raw$nume<-gsub(char_name_adm[12],"Î",data_adm_raw$nume) #replace with Î
  #   
  #   data_adm_raw$nume<-gsub(paste(char_name_adm[29],char_name_adm[9],sep=""),"Ă",data_adm_raw$nume) # change A with : and , to ă
  #   data_adm_raw$nume<-gsub(paste(char_name_adm[29],char_name_adm[18],sep=""),"C",data_adm_raw$nume) # probably badly parsed serbian C with accentS change to C
  #   data_adm_raw$nume<-gsub(paste(char_name_adm[50],char_name_adm[15],sep=""),"Ț",data_adm_raw$nume) # change L with ă , to Ț
  #   data_adm_raw$nume<-gsub(paste(char_name_adm[50],char_name_adm[76],sep=""),"Ș",data_adm_raw$nume) # change weird L with crescent, to Ț 
  #   
  #   data_adm_raw$nume<-gsub(char_name_adm[22],stri_unescape_unicode(paste0("\\u","0102")),data_adm_raw$nume) # change weird ă to ă
  #   data_adm_raw$nume<-gsub(char_name_adm[24],stri_unescape_unicode(paste0("\\u","0102")),data_adm_raw$nume) # change weird ă to ă
  #   data_adm_raw$nume<-gsub(char_name_adm[30],stri_unescape_unicode(paste0("\\u","0102")),data_adm_raw$nume) # # change weird ă to ă
  #   
  #   data_adm_raw$nume<-gsub(paste(paste(char_name_adm[27],char_name_adm[11],sep=""),char_name_adm[10],sep=""),char_name_adm[2],data_adm_raw$nume) # change â with Euro sign and ” to -
  #   data_adm_raw$nume<-gsub(paste(paste(char_name_adm[50],char_name_adm[15],sep=""),"Ș",sep=""),char_name_adm[2],data_adm_raw$nume) # change â with Euro sign and ” to -
  #   
  #   
  #   data_adm_raw$nume<-gsub(char_name_adm[26],char_name_adm[25],data_adm_raw$nume) # change side of accent on A in Hungarian (1 occurence)
  #   data_adm_raw$nume<-gsub(char_name_adm[17],"Ț",data_adm_raw$nume) # change weird Ț to Ț
  #   data_adm_raw$nume<-gsub(paste(char_name_adm[32],char_name_adm[63],sep=""),"Ț",data_adm_raw$nume) # change C with s (accented) to Ț
  #   data_adm_raw$nume<-gsub(char_name_adm[43],"I",data_adm_raw$nume) # change weird I with a dot to I
  #   data_adm_raw$nume<-gsub(char_name_adm[45],"I",data_adm_raw$nume) # change weird I with a accent (grave) to I (1 occurence)
  #   
  #   data_adm_raw$nume<-gsub(char_name_adm[57],char_name_adm[56],data_adm_raw$nume) # change weird O with 2 dots to O with 2 dots
  #   data_adm_raw$nume<-gsub(char_name_adm[69],char_name_adm[70],data_adm_raw$nume) # change weird U with 2 dots to U with 2 dots
  #   
  #   
  #   data_adm_raw$nume<-gsub(char_name_adm[61],"Ș",data_adm_raw$nume) # change weird Sh to proper Sh
  #   data_adm_raw$nume<-gsub(char_name_adm[65],"Ț",data_adm_raw$nume) # change remaining A with 2 dots to Ă
  #   
  #   data_adm_raw$nume<-gsub(paste(char_name_adm[24],char_name_adm[77],sep=""),"Â",data_adm_raw$nume) # change Ă with Z , to Â
  #   data_adm_raw$nume<-gsub(paste("A",char_name_adm[77],sep=""),"Â",data_adm_raw$nume) # change A with Z , to Â
  #   data_adm_raw$nume<-gsub(paste("A",char_name_adm[9],sep=""),"Â",data_adm_raw$nume) # change A with "," , to Â
  #   data_adm_raw$nume<-gsub(paste("A",char_name_adm[10],sep=""),char_name_adm[56],data_adm_raw$nume) # change A with """ , to O with 2 dots
  #   
  #   data_adm_raw$nume<-gsub("Ă“","O",data_adm_raw$nume) #
  #   data_adm_raw$nume<-gsub("Ă-","O",data_adm_raw$nume) 
  #   
  #   data_adm_raw$nume<-gsub(char_name_adm[19],"",data_adm_raw$nume) # change % to void
  #   data_adm_raw$nume<-gsub("\\s+(?=\\p{Pd})|(?<=\\p{Pd})\\s+", "", data_adm_raw$nume, perl=TRUE)
  #   data_adm_raw$nume<-gsub(char_name_adm[9],char_name_adm[8],data_adm_raw$nume)
  #   data_adm_raw$nume<-gsub(paste("\\.","",sep=""),"",data_adm_raw$nume) #  suppress .
  #   data_adm_raw$nume<-trimws(data_adm_raw$nume, which = c("both"))
  #   
  # 
  #   
  # }
  
  data_adm_raw$nume<-gsub("Ș","S",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ş","S",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ț","T",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ţ","T",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ă","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ă","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ã","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Â","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ä","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Á","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ӧ","O",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Č","C",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ć","C",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ç","C",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("É","E",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ë","E",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ğ","G",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("İ","I",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Í","I",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Î","I",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ő","O",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ö","O",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ó","O",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Õ","O",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Š","S",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ű","U",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ú","U",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ü","U",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ý","Y",data_adm_raw$nume)
  
  data_adm_raw$nume<-gsub("Ǎ","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ȋ","I",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ṣ","S",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ṭ","T",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ô","O",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("È","E",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ā","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ǎ","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ś","S",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ÿ","Y",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ǎ","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ϋ","Y",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ϋ","Y",data_adm_raw$nume)
  
  #2014
  data_adm_raw$nume<-gsub("Ĺ˘","T",data_adm_raw$nume)
  #data_adm_raw$nume<-gsub("Ĺ˘","T",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ҫ","C",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("†","",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("‰","",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Û","U",data_adm_raw$nume)
  data_adm_raw$nume<-gsub(" A€“ ","-",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("A“","O",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("–","-",data_adm_raw$nume)
  
  if (year==2013){
    data_adm_raw$nume<-gsub("Ž","AN",data_adm_raw$nume)
  }
  
  if (year==2012){
    data_adm_raw$nume<-gsub("’","AR",data_adm_raw$nume)
    #data_adm_raw$nume<-gsub("/","-",data_adm_raw$nume) already done
  }
  if (year==2009){
    data_adm_raw$nume<-gsub("`","'",data_adm_raw$nume)
  }
  if (year==2008){
    data_adm_raw$nume<-gsub("0","O",data_adm_raw$nume)
  }
  if (year==2007){
    data_adm_raw$nume<-gsub("_","-",data_adm_raw$nume)
    data_adm_raw$nume<-gsub("`","",data_adm_raw$nume) #appears at the end of one string
  }
  if (year==2006){
    data_adm_raw$nume<-gsub("\\?","A",data_adm_raw$nume) #Appears once in S?rbu
  }
  if (year==2004){
    data_adm_raw$nume<-gsub("_","T",data_adm_raw$nume) #Appears once in Ionu_
  }
  data_adm_raw$nume<-gsub("\\\\","",data_adm_raw$nume) #apears at the end of a string in 2005
  
  
  data_adm_raw$nume<-gsub("\t","AN",data_adm_raw$nume)
  data_adm_raw$nume<-gsub(chr(131),"",data_adm_raw$nume) # \u0083 to blank 
  data_adm_raw$nume<-gsub(chr(144),"",data_adm_raw$nume) # \u0090 to blank 
  data_adm_raw$nume<-gsub(chr(141),"",data_adm_raw$nume) # \u0090 to blank 
  
  #2014
  data_adm_raw$nume<-gsub(paste("Ĺ","Ž",sep=""),"S",data_adm_raw$nume) # L and Z to S (sh actually)
  data_adm_raw$nume<-gsub(paste("Ĺ","Ł",sep=""),"T",data_adm_raw$nume) # L and Z to S (sh actually)
  data_adm_raw$nume<-gsub("Ž","Z",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ś","S",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("‚","",data_adm_raw$nume)
  
  #2013
  data_adm_raw$nume<-gsub("Ý","Y",data_adm_raw$nume)
  
  #2018
  data_adm_raw$nume<-gsub("À","A",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Ê","E",data_adm_raw$nume)
  data_adm_raw$nume<-gsub("Î","I",data_adm_raw$nume)
  
  data_adm_raw$nume<-gsub(",","",data_adm_raw$nume) #comma to blank
  data_adm_raw$nume<-gsub("\\.","",data_adm_raw$nume) # . to blank
  
  
  
  data_adm_raw$nume<-gsub("\\s+(?=\\p{Pd})|(?<=\\p{Pd})\\s+", "", data_adm_raw$nume, perl=TRUE) # suppress space before -
  data_adm_raw$nume<-trimws(data_adm_raw$nume, which = c("both")) #trim white space leading and lagging
  data_adm_raw$nume<-gsub("\\s+", " ", trimws(data_adm_raw$nume))
  
  data_adm_raw$nume<-gsub("-", " ", trimws(data_adm_raw$nume))

  
  return(data_adm_raw$nume)
}